*** PREPARE NORWEGIAN DATA FROM MEDIA CORPUS AND COMBINE W/TON DATA ***

*** 	This code aggregates from article level visibility..	***
*** 	...to visibility on the level of MP X month 			***

***		Plus, adds metadata and combines all into...			***
***		...monthly & quarterly datasets ready for analysis 		***




********************************************************************************
**************** Aggregate media data, creating newscount **********************

** Load article level dataset 
use "article_level_norway.dta", clear

** Aggregate by MP and month, summing news presence
collapse ///
	(sum) newscount=actor_occ ///
	, by(actor_id yearmonth)

** Rename, reformat
rename actor_id actor
destring yearmonth, replace

** Save
save "mp_X_month norway.dta", replace
********************************************************************************




********************************************************************************
**************** Combine newscount with ToN data **************************

use "..\Prepare ToN data\ton_mp_X_month.dta", clear

** drop observations from before our media data
destring year, replace
drop if year <2000

** drop observations from after our ToN data
drop if yearmonth >201603

** merge
merge 1:1 yearmonth actor using "mp_X_month norway.dta"

** Drop observations with no match. Mainly due to time period not covered in ToN, 
** but also party leaders (should be dropped), suppleants and MPs with no name match in ToN
drop if _merge==2

** Replace missing newscount with meaningful 0s to account for no media presence
replace newscount=0 if _merge==1
drop _merge

** Save
save "mp_X_month norway.dta", replace
********************************************************************************




********************************************************************************
**************** Add age, years of experience and gender ************************

** Load article level dataset again 
use "article_level_norway.dta", clear

** Aggregate by MP and month, extracting gender
collapse ///
	(first) fem = gender year_of_birth = year_of_birth ///
	, by(actor_id)

** Labelling gender variable
label define fem 0 "Men" 1 "Women"
label values fem fem

** Add MP experience
merge 1:1 actor_id using "mps_first_year_no.dta"
drop _merge

** Rename
rename actor_id actor

** Save
save "gender_and_experience.dta", replace

** merge
use "mp_X_month norway.dta", clear
merge m:1 actor using "gender_and_experience.dta"
drop _merge

** Create age variable
gen age = year - year_of_birth
drop year_of_birth

** Create experience variable
gen mp_exp = year - first_year
drop first_year

** Save
save "mp_X_month norway.dta", replace
********************************************************************************




********************************************************************************
**************** Add meta data on parties, cabinets...  ************************

** Load meta data, rename, create new variables, drop some variables
use "cabinet party meta data no.dta", clear
rename party_seat_share seat_share
rename cabinet_seat_share share_of_parliament
rename election_month electionperiod
generate party_extremity = abs(party_left_right- 5)
recode cabinet_left_right_weighted (1/5=0)(5.1/10=1), gen(cabinet_right)
drop cabinet_id year cabinet_seats parliament_seats_total cabinet_left_right_weighted ///
	cabinet_left_right_unweighted election_id year_election yearmonth_election party_left_right

** Merge
merge 1:m yearmonth party_id using "mp_X_month norway.dta"
drop if _merge<3
drop _merge

** Save
save "mp_X_month norway.dta", replace
********************************************************************************




********************************************************************************
**************** Add months from election tally  *******************************

** Load 
use "tally_months_no.dta", clear

** Merge
merge 1:m yearmonth using "mp_X_month norway.dta"
drop if _merge<3
drop _merge

** Save
save "mp_X_month norway.dta", replace
********************************************************************************




********************************************************************************
***************** Monthly file ready for analysis, save ************************

** A few recodes to ease comparability of output from NO and UK analysis
rename turnout turnout_perc
rename list_number_relative_inv electoral_safety

** Encode a couple of string variables
encode actor, gen(actor_enc)
drop actor
rename actor_enc actor

encode comt_leader, gen(comt_leader_enc)
recode comt_leader_enc (1=1) (2=0)
drop comt_leader
rename comt_leader_enc comt_leader

destring month, replace

** Save version including months with no activity in Storting
save "..\Files for analysis\monthly data norway incl legspeechNAs.dta", replace

** Drop months with no activity in Storting (summer)
drop if month == 7 | month == 8 | month == 9

** Save version excluding months with no activity in Storting
save "..\Files for analysis\monthly data norway.dta", replace
********************************************************************************




********************************************************************************
**************** Create quarterly dataset  *************************************

** Load monthly dataset
use "..\Files for analysis\monthly data norway.dta", replace

** Create quarterly time variable
gen qdate = qofd(mdy(month, 1, year))
format qdate %tq

** Aggregate by actor and quarter
collapse ///
	(sum) newscount legspeech ///
	(first) party_id year county comt_hard electoral_safety ///
	county_no distance const_size turnout_perc electorate fem age mp_exp months_from_election ///
	seat_share share_of_parliament cabinet_right ///
	cabinet_party party_extremity comt_leader ///
	(max) electionperiod, ///
	by(actor qdate)

** Labelling gender variable
label values fem fem

* Create lagged newscount variable
tsset actor qdate
gen l_newscount = l.newscount
* No activity in Storting in 3rd quarter -> use 2 quarter lag to obtain lagged values for 4th quarter
gen l2_newscount = l2.newscount
replace l_newscount = l2_newscount if l_newscount == .
drop l2_newscount

** Quarterly file ready for analysis, save
save "..\Files for analysis\quarterly data norway.dta", replace
********************************************************************************




